**********************************************************
**** This do.file builds outcomes for causal analysis ****
**********************************************************
global db "D:\Dropbox\unequal_gains\main_data"
global Section4 "D:\Dropbox\unequal_gains\QJE revision plan\analysis\section4_data"


******************************************************
** 1) New/exiting goods & total change in spending ***
******************************************************

* A) HMS
use "$db/Important Price Datasets/new_exit_QM_2004.dta", clear
foreach i of numlist 2005(1)2015 {
append using "$db/Important Price Datasets/new_exit_QM_`i'.dta"
}
* keep track of spending in long format 
foreach i of numlist 2004(1)2015 {
gen temp=total_spending_QM*(year==`i')
bysort product_module_code quality_rank: egen total_spending_QM_`i'=max(temp)
drop temp
}
* collapse dataset after keep only "balanced" panel
egen id=group(product_module_code quality_rank)
tsset id year
rename ssep temp
gen ssepQM=L.temp
drop temp
drop if year==2004
* make sure we have a balanced panel: observe 11 years & have 10 quality deciles for each module
bysort product_module_code: gen N_QMyears=[_N]
keep if N_QMy==110
drop N_QMy
gen feenstra=(1-ssnp)/(1-ssep)
* there is a small number of obs for which ssnp=1 or ssep=0, then we can't compute feenstra
replace ssnp=0 if missing(feenstra)
replace ssep=0 if missing(feenstra)
replace feenstra=1 if missing(feenstra)
bysort department_code year: egen p5 = pctile(feenstra), p(5)
bysort department_code year: egen p95 = pctile(feenstra), p(95)
gen feenstra_win = feenstra
replace feenstra_win = p95 if feenstra>p95
replace feenstra_win = p5 if feenstra<p5

* now bring in elasticities
merge m:1 product_module_code using "$db/Important Price Datasets/sigmas", keepusing(sigma_common)
drop if _merge==2
drop _merge

foreach i in feenstra_win {
gen `i'_common=`i'^(1/(sigma_common-1))
gen `i'_Handbury=`i'^(1/(2.09-1))
gen `i'_Dube=`i'^(1/(4-1))
gen `i'_MR=`i'^(1/(7-1))
gen `i'_BW=`i'^(1/(11.5-1))
}
* save at disaggregated level to compute full inflation below
preserve
keep product_module_code quality feenstra_win_* year
save "$Section4/temp.dta", replace
restore

collapse (mean) ssnp ssep feenstra feenstra_win, by(quality_rank department_code product_group_code product_module_code total_spending_QM_2*)
* generate log spending variables
gen AvgDeltaLog_totspending = ( ///
log( (total_spending_QM_2013+total_spending_QM_2014+total_spending_QM_2015)/3 ) ///
- log( (total_spending_QM_2004+total_spending_QM_2005+total_spending_QM_2006)/3) ) / (2015-2006)
gen AvgLog_totspending = ( ///
log( (total_spending_QM_2013+total_spending_QM_2014+total_spending_QM_2015)/3 ) ///
+ log( (total_spending_QM_2004+total_spending_QM_2005+total_spending_QM_2006)/3) ) / 2
gen HMS_newexit=1
save "$Section4/outcomes.dta", replace

* B) RMS
use "$db/Important Price Datasets/RMSnew_exit_QM_2006.dta", clear
foreach i of numlist 2007(1)2015 {
append using "$db/Important Price Datasets/RMSnew_exit_QM_`i'.dta"
}
* keep track of spending in long format 
foreach i of numlist 2006(1)2015 {
gen temp=total_spending_QM*(year==`i')
bysort product_module_code quality_rank: egen total_spending_QM_`i'=max(temp)
drop temp
}
* collapse dataset after keep only "balanced" panel
egen id=group(product_module_code quality_rank)
tsset id year
rename ssep temp
gen ssepQM=L.temp
drop temp
drop if year==2006
* make sure we have a balanced panel: 
bysort product_module_code: gen N_QMyears=[_N]
* have either 80 or 90 observations (since year 2010 missing for lots of them,
* we have either 8 or 9 years)
keep if N_QMy==80 | N_QMy==90
drop N_QMy

gen feenstra=(1-ssnp)/(1-ssep)
* there is a small number of obs for which ssnp=1 or ssep=0, then we can't compute feenstra
replace ssnp=0 if missing(feenstra)
replace ssep=0 if missing(feenstra)
replace feenstra=1 if missing(feenstra)

* winsorize feenstra at 5% level (by department/year)
bysort department_code year: egen p5 = pctile(feenstra), p(5)
bysort department_code year: egen p95 = pctile(feenstra), p(95)
gen feenstra_win = feenstra
replace feenstra_win = p95 if feenstra>p95
replace feenstra_win = p5 if feenstra<p5

* ensure that product modules don't switch product groups over time (happens for 20 obs)
bysort product_module_code: egen temp=min(product_group_code)
replace product_group_code=temp if product_group_code!=temp

* now bring in elasticities
merge m:1 product_module_code using "$db/Important Price Datasets/sigmas"
drop if _merge==2
drop _merge

foreach i in feenstra_win {
gen `i'_common=`i'^(1/(sigma_common-1))
gen `i'_Handbury=`i'^(1/(2.09-1))
gen `i'_Dube=`i'^(1/(4-1))
gen `i'_MR=`i'^(1/(7-1))
gen `i'_BW=`i'^(1/(11.5-1))
}
* save at disaggregated level to compute full inflation below
preserve
keep product_module_code quality feenstra_win_* year
save "$Section4/temp_RMS.dta", replace
restore

collapse (mean) ssnp ssep feenstra feenstra_win, by(quality_rank department_code product_group_code product_module_code total_spending_QM_2*)

* generate log spending variables
gen AvgDeltaLog_totspending = ( ///
log( (total_spending_QM_2014+total_spending_QM_2015)/2 ) ///
- log( (total_spending_QM_2007+total_spending_QM_2006)/2) ) / (2015-2007)
gen AvgLog_totspending = ( ///
log( (total_spending_QM_2014+total_spending_QM_2015)/2 ) ///
+ log( (total_spending_QM_2007+total_spending_QM_2006)/2) ) / 2

* now rename var and merge to main dataset
foreach i in  total_spending_QM_2006 total_spending_QM_2007 total_spending_QM_2008 ///
  total_spending_QM_2009 total_spending_QM_2010 total_spending_QM_2011 total_spending_QM_2012 ///
  total_spending_QM_2013 total_spending_QM_2014 total_spending_QM_2015 ssnpQM ssepQM feenstra ///
  feenstra_win AvgDeltaLog_totspending AvgLog_totspending {
rename `i' RMS_`i'
}
gen RMS_newexit=1
merge 1:1 product_module_code quality_rank using "$Section4/outcomes.dta"
replace RMS_newexit=0 if _merge==2
replace HMS_newexit=0 if _merge==1
drop _merge
save "$Section4/outcomes.dta", replace


************************************************************
** 2) Inflation & change in spending for continued goods ***
************************************************************


* A) HMS

use "$db/Important Price Datasets/inflation_QM_2004.dta", clear
gen year=2004
foreach i of numlist 2005(1)2014 {
append using "$db/Important Price Datasets/inflation_QM_`i'.dta"
replace year=`i' if missing(year)
}
* keep track of spending in long format 
foreach i of numlist 2004(1)2014 {
gen temp=(total_spending_QM+total_spending_QM_old)/2*(year==`i')
bysort product_module_code quality_rank: egen continued_spending_QM_`i'=max(temp)
drop temp
}
* collapse dataset after keep only "balanced" panel
* make sure we have a balanced panel: observe 11 years & have 10 quality deciles for each module
bysort product_module_code: gen N_QMyears=[_N]
keep if N_QMy==110
drop N_QMy
* 86% of observations are full (11 years times 10 quality deciles=110 obs.) 
* but they account for 98% of spending
foreach i in tornqvist_price_index ces_price_index ces_quantum_index laspeyres_price_index paasche_price_index {
bysort department_code year: egen p5 = pctile(`i'), p(5)
bysort department_code year: egen p95 = pctile(`i'), p(95)
gen `i'_win = `i'
replace `i'_win = p95 if `i'>p95
replace `i'_win = p5 if `i'<p5
drop p5 p95
}

* now bring the feenstra ratio 
replace year=year+1
merge 1:1 product_module_code quality year using "$Section4/temp.dta", keepusing(feenstra*)
drop if _merge==2
drop _merge

foreach i in common Handbury Dube MR BW {
gen fullces_price_index_win_`i'=ces_price_index_win*feenstra_win_`i'
gen lfullces_pi_win_`i'=log(ces_price_index_win*feenstra_win_`i')
}

collapse (mean) paasche* laspeyres* tornqvist* ces* fullces_* lfullces_*, by(quality_rank department_code product_group_code product_module_code continued_spending_QM_2*)
distinct product_module_code

* generate log spending variables
gen AvgDeltaLog_contspending = ( ///
log( (continued_spending_QM_2012+continued_spending_QM_2013+continued_spending_QM_2014)/3 ) ///
- log( (continued_spending_QM_2004+continued_spending_QM_2005+continued_spending_QM_2006)/3) ) / (2015-2006-1)
gen AvgLog_contspending = ( ///
log( (continued_spending_QM_2012+continued_spending_QM_2013+continued_spending_QM_2014)/3 ) ///
+ log( (continued_spending_QM_2004+continued_spending_QM_2005+continued_spending_QM_2006)/3) ) / 2

* merge to outcomes file
gen HMS_continued=1
merge 1:1 product_module_code quality using "$Section4/outcomes.dta"
drop _merge
save "$Section4/outcomes.dta", replace
 
* B) RMS
use "$db/Important Price Datasets/inflation_QM_2006_RMS.dta", clear
gen year=2006
foreach i of numlist 2007(1)2014 {
append using "$db/Important Price Datasets/inflation_QM_`i'_RMS.dta"
replace year=`i' if missing(year)
}
* keep track of spending in long format 
foreach i of numlist 2006(1)2014 {
gen temp=(total_spending_QM+total_spending_QM_old)/2*(year==`i')
bysort product_module_code quality_rank: egen continued_spending_QM_`i'=max(temp)
drop temp
}
* collapse dataset after keep only "balanced" panel
bysort product_module_code: gen N_QMyears=[_N]
* have either 70 or 90 observations (since year 2010 missing for lots of them,
* we have either 7 or 9 years -- for some we lose 2 years because can't match to previous/next years)
keep if N_QMy==70 | N_QMy==90
drop N_QMy

foreach i in tornqvist_price_index ces_price_index ces_quantum_index laspeyres_price_index paasche_price_index {
bysort department_code year: egen p5 = pctile(`i'), p(5)
bysort department_code year: egen p95 = pctile(`i'), p(95)
gen `i'_win = `i'
replace `i'_win = p95 if `i'>p95
replace `i'_win = p5 if `i'<p5
drop p5 p95
}

* ensure that product modules don't switch product groups over time (happens for 20 obs)
bysort product_module_code: egen temp=min(product_group_code)
replace product_group_code=temp if product_group_code!=temp

* now bring the feenstra ratio 
replace year=year+1
merge 1:1 product_module_code quality year using "$Section4/temp_RMS.dta", keepusing(feenstra*)
drop if _merge==2
drop _merge

foreach i in common Handbury Dube MR BW {
gen fullces_pi_win_`i'=ces_price_index_win*feenstra_win_`i'
gen lfullces_pi_win_`i'=log(ces_price_index_win*feenstra_win_`i')
}

collapse (mean) paasche* laspeyres* tornqvist* ces* fullces* lfullces*, by(quality_rank department_code product_group_code product_module_code continued_spending_QM_2*)
distinct product_module_code

* generate log spending variables
gen AvgDeltaLog_contspending = ( ///
log( (continued_spending_QM_2013+continued_spending_QM_2014)/2 ) ///
- log( (continued_spending_QM_2007+continued_spending_QM_2006)/2) ) / (2015-2007-1)
gen AvgLog_contspending = ( ///
log( (continued_spending_QM_2013+continued_spending_QM_2014)/2 ) ///
+ log( (continued_spending_QM_2007+continued_spending_QM_2006)/2) ) / 2

* now rename var and merge to main dataset
foreach i in continued_spending_QM_2006 continued_spending_QM_2007 continued_spending_QM_2008 ///
continued_spending_QM_2009 continued_spending_QM_2010 continued_spending_QM_2011 continued_spending_QM_2012 ///
continued_spending_QM_2013 continued_spending_QM_2014 paasche_price_index paasche_price_index_win laspeyres_price_index ///
laspeyres_price_index_win tornqvist_price_index tornqvist_price_index_win ces_price_index ces_quantum_index ///
ces_price_index_win ces_quantum_index_win AvgDeltaLog_contspending AvgLog_contspending ///
 fullces_pi_win_common fullces_pi_win_Handbury fullces_pi_win_Dube fullces_pi_win_MR fullces_pi_win_BW ///
 lfullces_pi_win_common lfullces_pi_win_Handbury lfullces_pi_win_Dube lfullces_pi_win_MR lfullces_pi_win_BW {
rename `i' RMS_`i'
}
gen RMS_continued=1
merge 1:1 product_module_code quality_rank using "$Section4/outcomes.dta"
drop _merge

* now finalize dataset
foreach i in RMS_continued HMS_continued RMS_newexit HMS_newexit {
replace `i'=0 if missing(`i')
}

save "$Section4/outcomes.dta", replace





